MacTech 1 to 12

home *** CD-ROM | disk | FTP | other *** search

/ MacTech 1 to 12 / MacTech-vol-1-12.toast / Source / MacTech® Magazine / Volume 10 - 1994 / 10.08 Aug 94 / Programmers' Challenge / factor.c next >

Wrap

C/C++ Source or Header | 1994-07-18 | 14.0 KB | 454 lines | [TEXT/MPS ]

/* Bob Boonstra * Solution strategy * Factoring is a field which has been the subject of a * great deal of research because of the implications for * cryptography, especially techniques that depend on * the difficulty of factoring very large numbers. * Therefore, it is possible that some of these * algorithms could be applied to the challenge. * * However, in the event that no mathematician * specializing in the field chooses to enter the * Challenge, this relatively simple solution takes * advantage of some of the simplifying conditions in the * problem statement: * 1) the numbers are relatively small (64 bits, or * ~<20 digits) * 2) the prime factors are even smaller (32 bits, or * ~<10 digits) * * This solution depends on no precomputed information. * It is based on Fermat's algorithm, described in Knuth * Vol II, which is especially well suited to the problem * because it is most efficient when the two p, * * Fermat's algorithm requires ~(p-1)sqrt(n) iterations, * where n=u*v and u~=p*sqrt(n), v~=sqrt(n)/p. * Other algorithms require half as many iterations, but * require more calculation per iteration. * * Fermat's algorithm works as follows: * 1) Let n - u*v, u and v odd primes. * 2) Set a = (u+v)/2 and b = (u-v)/2. * 3) Then n = uv = a**2 - b**2 * 4) Initialize a = trunc(sqrt(n)), b=0, r=a**2-b**2-n * 5) Iterate looking for r==0, with an inner loop that * keeps a=(u+v)/2 constant and increases b=(u-v)/2 * by 1 each iteration until r becomes negative. When * this happens, the halfsum a is increased by 1, and * the difference loop is repeated. * * The algorithm in Knuth uses auxiliary variables x,y * for efficiency, where: * x = 2*a+1 and y = 2*b+1 * This works fine in most cases, but causes overflow * of a longword when x,y are the full 32-bits in size. * So we have augmented the algorithm to deal with this * case. * * This solution also uses an efficient integer sqrt * algorithm due to Ron Hunsinger, and extends that * algorithm to 64 bits. */ #pragma options(assign_registers,honor_register) #define ulong unsigned long #define ushort unsigned short #define kLo16Bits 0xFFFF #define kHiBit 0x80000000UL #define kLo2Bits 3 #define kLo1Bit 1 /* * Macros RightShift2 and RightShift1 shift a 64-bit value * right by 2 and 1 bits, respectively. */ #define RightShift32By2(xL,xH) \ { \ xL >>= 2; \ xL |= (xH & kLo2Bits)<<30; \ xH >>= 2; \ } #define RightShift32By1(xL,xH) \ { \ xL >>= 1; \ xL |= (xH & kLo1Bit)<<31; \ xH >>= 1; \ } /* * Macros Add32To64 (Sub32From64) add (subtract) a 32-bit * value to (from) a 64-bit value. */ #define Add32To64(rL,rH, a) \ temp = rL; \ if ((rL += a) < temp) ++rH; #define Add2NPlus1To64(lowR,highR,a) \ Add32To64(lowR,highR,a); \ Add32To64(lowR,highR,a); \ Add32To64(lowR,highR,1); #define Sub32From64(rL,rH, s) \ temp = rL; \ if ((rL -= s) > temp) --rH; #define Sub2NPlus1From64(lowR,highR,s) \ Sub32From64(lowR,highR,s); \ Sub32From64(lowR,highR,s); \ Sub32From64(lowR,highR,1); /* * Macros Add64 (Sub64) add (subtract) two 64-bit values. */ #define Add64(qL,qH, eL,eH) \ Add32To64(qL,qH,eL); \ qH += eH; #define Sub64(qL,qH, eL,eH) \ Sub32From64(qL,qH, eL); \ qH -= eH; /* * Macro Square64 multiplies a 32-bit value by itself to * produce the square as a 64-bit value. For this solution, * we only need to execute this macro expansion once. */ #define Square64(a,rL,rH,temp) \ { \ ulong lohi,lolo,hihi; \ ushort aHi,aLo; \ \ aHi = a>>16; \ aLo = a; \ \ rL = (lolo = (ulong)aLo*aLo)&kLo16Bits; \ lohi = (ulong)aLo*aHi; \ \ temp = ((lohi&kLo16Bits)<<1) + (lolo>>16); \ rL |= temp<<16; \ \ temp>>=16; \ temp += ((hihi = (ulong)aHi*aHi)&kLo16Bits) + \ (lohi>>(16-1)); \ rH = temp&kLo16Bits; \ \ temp>>=16; \ temp += hihi>>16; \ rH |= temp<<16; \ } /* * Macros LessEqualZero64 and EqualZero64 determine if * 64-bit (signed) values are <= 0 or == 0, respectively. */ #define LessEqualZero64(vL,vH) \ ( (0>(long)vH) || ((0==vH) && (0==vL)) ) #define EqualZero64(vL,vH) \ ((0==vL) && (0==vH)) /* * Macro LessEqual64 determines if one 64-bit quantity is * less than or equal to another. */ #define LessEqual64(uL,uH, vL,vH) \ ( (uH< vH) || ((uH==vH) && (uL<=vL))) /* * Function prototypes. */ ulong sqrt64 (ulong nLo,ulong nHi); void Factor64(ulong lowHalf,ulong highHalf, ulong *prime1Ptr,ulong *prime2Ptr); /* * The solution ... */ /********************************************************* * * * Factor64 * * * *********************************************************/ void Factor64(lowHalf,highHalf,prime1Ptr,prime2Ptr) unsigned long lowHalf,highHalf; unsigned long *prime1Ptr,*prime2Ptr; { register ulong x,y,lowR,highR; register ulong temp; ulong sqrtN; /* * Fermat's algorithm (Knuth) * * Assume n=u*v, u<v, n odd, u,v odd * Let a=(u+v)/2 b=(u-v)/2 n=a**2-b**2 0<=y<x<=n * Search for a,b that satisfy x**2-y**2-n==0 * * NOTE: u,v given as being < 2**32 (fit in one word). * Therefore a,b also are < 2**32 (and fit in one word). * * C1: Set x=2*floor(srt(n))+1, * y=1, * r=floor(sqrt(n))**2-n * x corresponds to 2a+1, y to 2b+1, r to a**2-b**2-n * * C2: if r<=0 goto C4 * (algorithm modified to keep r positive) * C3: r=r-y, y=y+2, * goto C2 * C4: if (r==0) terminate with n = p*q, * p=(x-y)/2, q=(x+y-2)/2 * C5: r=r+x, x=x+2, * goto C3 * * This solution modifies the algorithm to: * (1) reorder arithmetic on r to keep it positive * (2) extend r to 64 bits when necessary * (3) handle the trivial case where one of the primes is 2. */ /* * Handle the trivial case with an even prime factor. */ if (0 == (lowHalf&1)) { *prime1Ptr = 2; *prime2Ptr = (lowHalf>>1) | ((highHalf&kLo1Bit)<<31); return; } /* * Compute truncated square root of input 64-bit number. */ sqrtN = temp = sqrt64(lowHalf,highHalf); Square64(temp,lowR,highR,y); /* * Initialize r to s*s - n, * but calculate n-s*s to keep r positive, and fix * later when it is time to add x to r by calculating * r = x - (n-s*s). */ Sub64(lowHalf,highHalf, lowR,highR); /* * Handle perfect square case. */ if ((0==highHalf) && (0==lowHalf)) { *prime1Ptr = *prime2Ptr = sqrtN; return; } y = 1; highR = 0; /* * Separate out the overflow case where x=2a+1 does not * fit into a long word. */ if ((temp=sqrtN) >= kHiBit-1) goto doLargeX; /* * If sqrt(n) < 0x80000000, then 2*sqrt(n)+1 fits in one * long word. Also, n-trunc(sqrt(n))**2 < 2*trunc(sqrt(n)) * also fits in a long word. */ x = 1+2*temp; lowR = x - lowHalf; x += 2; do { C2: if (lowR<=y) break; lowR -= y; y += 2; } while (true); /* exit when r<=y */ C4: if (y==lowR) { *prime1Ptr = (x-y-2)>>1; *prime2Ptr = (x+y)>>1; return; } lowR += (x-y); y += 2; /* * Fall through to modified algorithm if x overflows a * long word. */ if ((x += 2) < (ulong)0xFFFFFFFF-2) goto C2; /* * Adjust x and y to guarantee they will not overflow. * This requires some extra arithmetic to add 2*a+1 and * subtract 2*b+1, but that is preferable to using two * longs to represent each of x and y. */ x>>=1; y>>=1; goto C3L; doLargeX: /* * x=2*a+1 no longer fits in 32 bits, so we sacrifice a * little loop efficiency and let x=a. * Likewise, we let y=b instead of 2*b+1. */ lowR = x = temp; Add32To64(lowR,highR,x); Sub64(lowR,highR, lowHalf,highHalf); ++x; do { if ( LessEqualZero64(lowR,highR) ) break; C3L: Sub2NPlus1From64(lowR,highR,y); ++y; } while (true); /* exit when lowR,highR<=0 */ C4L: if ( EqualZero64(lowR,highR)) { *prime1Ptr = x-y; *prime2Ptr = x+y; return; } Add2NPlus1To64(lowR,highR,x); ++x; goto C3L; } /********************************************************* * * * sqrt64 * * * *********************************************************/ /* * sqrt_max4pow is the largest power of 4 that can be * represented in an unsigned long. */ #define sqrt_max4pow (1UL << 30) /* * undef sqrt_truncate if rounded sqrts are desired; for * the factoring problem we want truncated sqrts. */ #define sqrt_truncate /* * sqrt64 is based on code posted by Ron Hunsinger * to comp.sys.mac.programmer. Modified to handle 64-bit * values. */ ulong sqrt64 (ulong lowN, ulong highN) { /* * Compute the integer square root of the integer argument * n. Method is to divide n by x computing the quotient x * and remainder r. Notice that the divisor x is changing * as the quotient x changes. * * Instead of shifting the dividend/remainder left, we * shift the quotient/divisor right. The binary point * starts at the extreme left, and shifts two bits at a * time to the extreme right. * * The residue contains n-x**2. * Since (x + 1/2)**2 == x**2 + x + 1/4, * n - (x + 1/2)**2 == (n - x**2) - (x + 1/4) * Thus, we can increase x by 1/2 if we decrease (n-x**2) * by (x+1/4) */ register ulong lowResidue,highResidue; /* n - x**2 */ register ulong lowRoot,highRoot; /* x + 1/4 */ register ulong half; /* 1/2 */ ulong highhalf,lowhalf,temp; lowResidue = lowN; if (0 != (highResidue = highN)) { /* * This code extends the original algorithm from 32 bits to * 64 bits. It parallels the 32-bit code; see below for * comments. */ highRoot = sqrt_max4pow; lowRoot = 0; while (highRoot>highResidue) RightShift32By2(lowRoot,highRoot); Sub64(lowResidue,highResidue, lowRoot,highRoot); /* * The binary point for half is now in the high order of * two 32-bit words representing the 64-bit value. */ lowhalf = lowRoot; highhalf = highRoot; RightShift32By2(lowhalf,highhalf); Add64(lowRoot,highRoot, lowhalf,highhalf); if (0==highhalf) goto sqrt2; half = highhalf<<1; do { if (LessEqual64(lowRoot,highRoot,lowResidue, highResidue)) { highResidue -= highRoot; highRoot += half; } if (0 == (half>>=2)) { half = sqrt_max4pow<<1; goto sqrt2a; } highRoot -= half; highRoot >>= 1; } while (true); sqrt2: /* * The binary point for half is now in the lower of the * two 32-bit words representing the 64-bit value. */ half = lowhalf<<1; do { if ((0==highResidue) && (0==highRoot)) goto sqrt3; if (LessEqual64(lowRoot,highRoot,lowResidue, highResidue)) { Sub64(lowResidue,highResidue, lowRoot,highRoot); Add32To64(lowRoot,highRoot,half); } half >>= 2; sqrt2a: Sub32From64(lowRoot,highRoot,half); RightShift32By1(lowRoot,highRoot); } while (half); } else /* if (0 == highResidue) */ { #ifndef sqrt_truncate if (lowResidue <= 12) return (0x03FFEA94 >> (lowResidue *= 2)) & 3; #else if (lowResidue <= 15) return (0xFFFEAA54 >> (lowResidue *= 2)) & 3; #endif lowRoot = sqrt_max4pow; while (lowRoot>lowResidue) lowRoot>>=2; /* Decrease (n-x**2) by (0+1/4) */ lowResidue -= lowRoot; /* 1/4, with binary point shifted right 2 */ half = lowRoot >> 2; /* x=1. (lowRoot is now (x=1)+1/4.) */ lowRoot += half; /* 1/2, properly aligned */ half <<= 1; /* Normal loop (there is at least one iteration remaining)*/ do { sqrt3: if (lowRoot <= lowResidue) { /* * Whenever we can, decrease (n-x**2) by (x+1/4) */ lowResidue -= lowRoot; lowRoot += half; } /* Shift binary point 2 places right */ half >>= 2; /* x{+1/2}+1/4 - 1/8 == x{+1/2}+1/8 */ lowRoot -= half; /* 2x{+1}+1/4, shifted right 2 places */ lowRoot >>= 1; /* When 1/2 == 0, bin point is at far right */ } while (half); } #ifndef sqrt_truncate if (lowRoot < lowResidue) ++lowRoot; #endif /* * Return value guaranteed to be correctly rounded * (or truncated) */ return lowRoot; }